Set the right working directory.
setwd("C:/Users/elise/Documents/Mémoire/Main/Data/Templates/SPPU")
Import the data sets extracted from the Data Preparation R Markdown.
list.files()
## [1] "endpoint.txt" "plant_info.txt" "S_timeseries.txt" "T_timeseries.txt"
## [5] "timeseries.txt"
plant_info <- read.table("plant_info.txt", header = TRUE, sep = "\t")
S_timeseries <- read.table("S_timeseries.txt", header = TRUE, sep = "\t")
T_timeseries <- read.table("T_timeseries.txt", header = TRUE, sep = "\t")
Convert the columns to factor and date formats.
# plant_info
plant_info <- lapply(plant_info, factor)
# S_timeseries
matching_cols <- intersect(names(S_timeseries), names(plant_info))
S_timeseries[, matching_cols] <- lapply(S_timeseries[, matching_cols], factor)
S_timeseries$Timestamp <- as.POSIXct(S_timeseries$Timestamp, format = "%Y-%m-%d %H:%M:%S")
S_timeseries$Date <- date(S_timeseries$Date)
# T_timeseries
matching_cols <- intersect(names(T_timeseries), names(plant_info))
T_timeseries[, matching_cols] <- lapply(T_timeseries[, matching_cols], factor)
T_timeseries$Timestamp <- as.POSIXct(T_timeseries$Timestamp, format = "%Y-%m-%d %H:%M:%S")
T_timeseries$Date <- date(T_timeseries$Date)
Collect the variables of every data template and print the names of the variables. This serves as a double check.
platform <- "SPPU"
# S_timeseries
df_S_timeseries <- S_timeseries[,colSums(is.na(S_timeseries))<nrow(S_timeseries)]
genotype_index <- which(colnames(df_S_timeseries) == "Genotype")
variables_S <- colnames(df_S_timeseries[, c(5:(genotype_index - 1))]) # We remove the three first columns that are "Unit.ID","Time" and "Date"
# T_timeseries
df_T_timeseries <- T_timeseries[,colSums(is.na(T_timeseries))<nrow(T_timeseries)]
genotype_index <- which(colnames(df_T_timeseries) == "Genotype")
variables_T <- colnames(df_T_timeseries[, c(5:(genotype_index - 1))]) # We remove the three first columns that are "Unit.ID","Time" and "Date"
print(paste(platform, ": The variables for S_timeseries are", paste(variables_S, collapse = ", "), sep = " "))
## [1] "SPPU : The variables for S_timeseries are S_Height_cm, S_Height_pixel, S_Area_cmsquared, S_Area_pixel, S_Perimeter_cm, S_Perimeter_pixel, S_Compactness, S_Width_cm, S_Width_pixel"
print(paste(platform, ": The variables for T_timeseries are", paste(variables_T, collapse = ", "), sep = " "))
## [1] "SPPU : The variables for T_timeseries are T_Area_cm_squared, T_Area_pixel, T_Perimeter_cm, T_Perimeter_pixel, T_Compactness, T_Roundness, T_Roundness2, T_Isotropy, T_Eccentricity, T_Rms, T_Sol"
Add a column Plant_type with three levels, H L and T. This variable is useful to test for heterosis effects.
S_timeseries$Plant_type <- substr(S_timeseries$Genotype, nchar(as.character(S_timeseries$Genotype)), nchar(as.character(S_timeseries$Genotype)))
T_timeseries$Plant_type <- substr(T_timeseries$Genotype, nchar(as.character(T_timeseries$Genotype)), nchar(as.character(T_timeseries$Genotype)))
print(paste0("No data for ", platform))
## [1] "No data for SPPU"
print(paste0("No data for", platform))
## [1] "No data forSPPU"
Using several functions that are located in the functions.R script
print(paste0("No data for", platform))
## [1] "No data forSPPU"
print(paste0("No data for", platform))
## [1] "No data forSPPU"
Test for normality hypothesis and plot density histogram. The red curve is the normal distribution, the blue dotted curve is the data density curve.
print(paste0("No data for", platform))
## [1] "No data forSPPU"
Remove the outliers, replacing them with NULL values and normality visual verification.
The function detect_replace_ouliers_by_genotype checks for outlying values, using the Tukey method.
Then run the function on all variables of the dataset.
print(paste0("No data for", platform))
## [1] "No data forSPPU"
print(paste0("No data for", platform))
## [1] "No data forSPPU"
print(paste0("No data for", platform))
## [1] "No data forSPPU"
print(paste0("No data for", platform))
## [1] "No data forSPPU"
In this part, we look at the timeseries, S_timeseries and T_timeseries datasets, also using several functions, located in the functions.R script.
h2 <- ggplot(S_timeseries, aes(x = Date)) +
geom_bar(aes(fill = Genotype), position = "stack", width = 0.96) +
scale_fill_viridis_d(option = "D") +
labs(x = "Date", y = "Number of observations", title = "Observations per day for S_timeseries") +
scale_y_continuous(breaks = seq(from = 0, to = 325, by = 25)) +
scale_x_date(date_breaks = "2 days", date_labels = "%d-%m-%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_line(color = "lightgray", size = 0.5),
panel.grid.minor.x = element_blank())
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
h3 <- ggplot(T_timeseries, aes(x = Date)) +
geom_bar(aes(fill = Genotype), position = "stack", width = 0.96) +
scale_fill_viridis_d(option = "D") +
labs(x = "Date", y = "Number of observations", title = "Observations per day for T_timeseries") +
scale_y_continuous(breaks = seq(from = 0, to = 325, by = 25)) +
scale_x_date(date_breaks = "2 days", date_labels = "%d-%m-%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_line(color = "lightgray", size = 0.5),
panel.grid.minor.x = element_blank())
h2
h3
Scatter plots by Genotype
print(paste0("No data for", platform))
## [1] "No data forSPPU"
Scatterplots for all genotypes by Plant type (Hybride, Line, EPPN20_T) with smooth line.
print(paste0("No data for", platform))
## [1] "No data forSPPU"
Scatter plots for all genotypes by water treatment
print(paste0("No data for", platform))
## [1] "No data forSPPU"
Scatter plots by Genotype
plot_scatter_by_genotype(S_timeseries, variables_S, "EPPN_T")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
Scatterplots for all genotypes by Plant type (Hybride, Line, EPPN20_T) with smooth line.
plot_scatter_with_smooth(S_timeseries, variables_S)
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Warning: Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 31 rows containing missing values (`geom_point()`).
## Warning: Removed 31 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 31 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Warning: Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
Scatter plots for all genotypes by water treatment
plot_scatter_with_smooth_water(S_timeseries, variables_S)
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Warning: Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 31 rows containing missing values (`geom_point()`).
## Warning: Removed 31 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 31 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Warning: Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values (`geom_point()`).
## Removed 30 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (`stat_smooth()`).
Scatter plots by Genotype
plot_scatter_by_genotype(T_timeseries, variables_T, "EPPN_T")
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
Scatterplots for all genotypes by Plant type (Hybride, Line, EPPN20_T) with smooth line.
plot_scatter_with_smooth(T_timeseries, variables_T)
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Warning: Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 27 rows containing missing values (`geom_point()`).
## Warning: Removed 26 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 27 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Warning: Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
Scatter plots for all genotypes by water treatment
plot_scatter_with_smooth_water(T_timeseries, variables_T)
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Warning: Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 27 rows containing missing values (`geom_point()`).
## Warning: Removed 26 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 27 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Warning: Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).